{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train LSTM->XGB on TBI signal data\n",
    "\n",
    "Code to train an XGBoost model on the TBI data (embedded by the LSTM) to predict hypoxemia in the future (low SAO2).\n",
    "\n",
    "Note that the data is private and we are unable to make it publicly available in this repo."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load data and setup paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"4\"\n",
    "\n",
    "import numpy as np\n",
    "from tbi_downstream_prediction import split_data\n",
    "\n",
    "import keras\n",
    "from keras.utils import multi_gpu_model\n",
    "from keras.layers import Input, LSTM, Dense, Dropout\n",
    "from keras.models import Sequential, load_model, Model\n",
    "from matplotlib import cm, pyplot as plt\n",
    "from sklearn import metrics\n",
    "from os.path import expanduser as eu\n",
    "from os.path import isfile, join\n",
    "import numpy as np\n",
    "import random, time\n",
    "\n",
    "import tensorflow as tf\n",
    "from keras.backend.tensorflow_backend import set_session\n",
    "config = tf.ConfigProto(allow_soft_placement=True,gpu_options = tf.GPUOptions(allow_growth=True))\n",
    "set_session(tf.Session(config=config))\n",
    "\n",
    "PATH = \"/homes/gws/hughchen/phase/tbi_subset/\"\n",
    "DPATH = PATH+\"tbi/processed_data/hypoxemia/\"\n",
    "data_type = \"raw[top11]\"\n",
    "\n",
    "feat_lst = [\"ECGRATE\", \"ETCO2\", \"ETSEV\", \"ETSEVO\", \"FIO2\", \"NIBPD\", \"NIBPM\", \n",
    "            \"NIBPS\",\"PEAK\", \"PEEP\", \"PIP\", \"RESPRATE\", \"SAO2\", \"TEMP1\", \"TV\"]\n",
    "\n",
    "# Exclude these features\n",
    "weird_feat_lst = [\"ETSEV\", \"PIP\", \"PEEP\", \"TV\"]\n",
    "feat_inds = np.array([feat_lst.index(feat) for feat in feat_lst if feat not in weird_feat_lst])\n",
    "feat_lst2 = [feat for feat in feat_lst if feat not in weird_feat_lst]\n",
    "\n",
    "y_tbi = np.load(DPATH+\"tbiy.npy\")\n",
    "X_tbi = np.load(DPATH+\"X_tbi_imp_standard.npy\")\n",
    "\n",
    "X_tbi2 = X_tbi[:,feat_inds,:]\n",
    "(X_test, y_test, X_valid, y_valid, X_train, y_train) = split_data(DPATH,X_tbi2,y_tbi,flatten=False)\n",
    "\n",
    "PATH = \"/homes/gws/hughchen/phase/tbi_subset/\"\n",
    "RESULTPATH = PATH+\"results/\"\n",
    "label_type = \"desat_bool92_5_nodesat\"\n",
    "lstm_type = \"biglstmdropoutv3_{}\".format(label_type)\n",
    "RESDIR = '{}{}/'.format(RESULTPATH, lstm_type)\n",
    "if not os.path.exists(RESDIR): os.makedirs(RESDIR)\n",
    "GPUNUM = len(os.environ[\"CUDA_VISIBLE_DEVICES\"].split(\",\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load best LSTM model from \"Run LSTM\" and create an embedding model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "opt_name=\"rmsprop\";lr=0.001;drop=0.5;b_size=1000;epoch_num=200\n",
    "\n",
    "# Fixed hyperpara\n",
    "print(\"[PROGRESS] Starting create_train_model()\")\n",
    "lookback = 60; h1 = 50; h2 = 50\n",
    "loss_func = \"binary_crossentropy\"\n",
    "\n",
    "# Form the model name (for saving the model)\n",
    "mod_name  = \"multivariate_biglstmdropoutv3_{}n_{}n_{}ep\".format(h1,h2,epoch_num)\n",
    "mod_name += \"_{}opt_{}lr\".format(opt_name,lr)\n",
    "mod_name += \"_{}drop_{}bs\".format(drop,b_size)\n",
    "MODDIR = PATH+\"models/tune_biglstm/\"+mod_name+\"/\"\n",
    "\n",
    "# Load the best model (in terms of validation performance)\n",
    "min_mod = load_min_model_helper(MODDIR)\n",
    "\n",
    "########## Form Model/Data #########\n",
    "min_mod_weights = min_mod.get_weights()\n",
    "\n",
    "X_train_lst = []; X_valid_lst = []; X_test_lst  = []\n",
    "sig_lst     = []; encoded_lst = []\n",
    "\n",
    "for i in range(0,len(feat_lst2)):\n",
    "    X_train_lst.append(X_train[:,:,i:(i+1)])\n",
    "    X_valid_lst.append(X_valid[:,:,i:(i+1)])\n",
    "    X_test_lst.append(X_test[:,:,i:(i+1)])\n",
    "\n",
    "    sig = Input(shape=(lookback,1))\n",
    "    lstm1 = LSTM(h1, recurrent_dropout=drop, return_sequences=True, \n",
    "                 weights=min_mod_weights[(i*3):((i+1)*3)])\n",
    "    lstm2 = LSTM(h2, recurrent_dropout=drop, dropout=drop,\n",
    "                 weights=min_mod_weights[(33+(i*3)):(33+((i+1)*3))])\n",
    "\n",
    "    encoded = lstm2(lstm1(sig))\n",
    "    sig_lst.append(sig); encoded_lst.append(encoded)\n",
    "\n",
    "model = Model(inputs=sig_lst, outputs=encoded_lst)\n",
    "opt = \"rmsprop\"; loss=\"binary_crossentropy\"\n",
    "if GPUNUM > 1: model = multi_gpu_model(model,gpus=GPUNUM)\n",
    "model.compile(optimizer=opt, loss=loss_func)\n",
    "\n",
    "# Create embeddings\n",
    "X_train_embed_lst = model.predict(X_train_lst)\n",
    "X_valid_embed_lst = model.predict(X_valid_lst)\n",
    "X_test_embed_lst  = model.predict(X_test_lst)\n",
    "\n",
    "np.save(DPATH+\"X_train_embed_lstmbigdropoutv3_50n_arr\",np.concatenate(X_train_embed_lst,1))\n",
    "np.save(DPATH+\"X_valid_embed_lstmbigdropoutv3_50n_arr\",np.concatenate(X_valid_embed_lst,1))\n",
    "np.save(DPATH+\"X_test_embed_lstmbigdropoutv3_50n_arr\", np.concatenate(X_test_embed_lst,1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load embedded data and train XGB model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tbi_downstream_prediction import *\n",
    "PATH = \"/homes/gws/hughchen/phase/tbi_subset/\"\n",
    "DPATH = PATH+\"tbi/processed_data/hypoxemia/\"\n",
    "RESULTPATH = PATH+\"results/\"\n",
    "\n",
    "# Load data\n",
    "X_train = np.load(DPATH+\"X_train_embed_lstmbigdropoutv3_50n_arr.npy\",mmap_mode=\"r\")\n",
    "X_valid = np.load(DPATH+\"X_valid_embed_lstmbigdropoutv3_50n_arr.npy\",mmap_mode=\"r\")\n",
    "X_test  = np.load(DPATH+\"X_test_embed_lstmbigdropoutv3_50n_arr.npy\",mmap_mode=\"r\")\n",
    "\n",
    "# Set important variables\n",
    "label_type = \"desat_bool92_5_nodesat\"; eta = 0.02\n",
    "hosp_data = \"tbi\"; data_type = \"lstm_big_50n[top11]\"\n",
    "mod_type = \"xgb_{}_eta{}\".format(label_type,eta)\n",
    "\n",
    "# Set up result directory\n",
    "RESDIR = '{}results/{}/'.format(PATH, mod_type)\n",
    "if not os.path.exists(RESDIR): os.makedirs(RESDIR)\n",
    "\n",
    "# Set parameters to train model\n",
    "param = {'max_depth':6, 'eta':eta, 'subsample':0.5, 'gamma':1.0, \n",
    "         'min_child_weight':10, 'base_score':y_train.mean(), \n",
    "         'objective':'binary:logistic', 'eval_metric':[\"logloss\"]}\n",
    "\n",
    "# Train and save xgb model\n",
    "train_save_xgb_model(X_train,y_train,X_valid,y_valid,RESDIR,\n",
    "                     param,hosp_data,data_type)\n",
    "\n",
    "# Test xgb model\n",
    "test_xgb_model(X_test,y_test,RESDIR,param,hosp_data,data_type)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
